---
title: "Adults"
output: html_document
---

#Read in data and load libraries
```{r setup, include=FALSE}
library(DESeq2)
library("magrittr")
library("dplyr")
library("ggplot2")
library("reshape2")
library("RColorBrewer")
library("gplots")
library("Cairo")

matrix = read.table("adult2.counts.matrix") #RSEM output
samples = read.table("adults2.txt", sep="\t", header=TRUE) #Sample Data
samples$ID = samples$good


#Remove Loci with less than 10 reads across all samples
adults_new = matrix %>% mutate(Total = dplyr::select(., P10F:P9FD) %>% rowSums(na.rm = TRUE))
rownames(adults_new) = rownames(matrix)
adults_new = subset(adults_new, adults_new[ , 18] > 10, drop=FALSE ) 
adults_new = adults_new[,1:17]


matrix = as.matrix(adults_new)
matrix2 = round(matrix)

annotation = read.csv("trinotate_annotation_report", header=TRUE, sep=",")

```

#run DESEQ for all adults
```{r}
dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Sex + Genotype + Sex*Genotype)
dds = DESeq(dds)
resultsNames(dds)
plotDispEsts(dds)
```


#separate out genotype, sex, and interaction significant DE genes alpha = 0.05 and logfold min = 2
```{r}
res_geno <- results(dds, name="Genotype_BB_vs_AA", alpha=0.05) %>% as.data.frame
resx_geno <- res_geno %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame

res_sex <- results(dds, name="Sex_Male_vs_Female", alpha=0.05)  %>%  as.data.frame
resx_sex <- res_sex %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame

res_int <- results(dds, name="SexMale.GenotypeBB", alpha=0.05)
resx_int <- res_int %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame



```


#look at overall sample relationships using distance matrix and pca
```{r}


vsd <- vst(dds, blind=FALSE)


greeks=c(alpha='\u03b1', tau='\u03c4', sigma='\u03c3',
                         beta='\u03b2',
                         gamma='\u03b3')


levels(vsd$Genotype)  = c(paste0(greeks['alpha'],greeks['alpha']), paste0(greeks['beta'],greeks['beta']))


library("pheatmap")
sampleDists <- dist(t(assay(vsd)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$Sex,  vsd$Genotype,  sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)



library(vegan)

sampleDists <- dist(t(assay(vsd)), method = "manhattan")

adonis2(formula= sampleDists ~ samples$Genotype+ samples$Population + samples$Sex, method = "manhattan")



ggplotColours <- function(n = 6, h = c(0, 360) + 15){
  if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360/n
  hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}



pcaData <- plotPCA(vsd, intgroup=c("Genotype", "Sex"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
good = ggplot(pcaData, aes(PC1, PC2, color=Genotype, shape=Sex)) +
  geom_point(size=4) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + scale_colour_manual(values=ggplotColours(n=3),labels=greeks) +
  coord_fixed()


cairo_pdf("adult_pca.pdf",width=8,height=8)
good
dev.off()


pcaData <- plotPCA(vsd, intgroup=c("Population", "Sex"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
pop = ggplot(pcaData, aes(PC1, PC2, color=Population, shape=Sex)) +
  geom_point(size=4) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) +
  coord_fixed()


cairo_pdf("adult_pop_pca.pdf",width=8,height=8)
pop
dev.off()



cairo_pdf("distance_matrix.pdf",width=10,height=8)

pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)
dev.off()
```




# Volcano and MA for sex
```{r}
resLFC_sex <- lfcShrink(dds, coef="Sex_Male_vs_Female", type="apeglm")
plotMA(resLFC_sex,0.05, ylim=c(-15,15))
library("EnhancedVolcano")
sex = EnhancedVolcano(resLFC_sex,
                lab= rownames(resLFC_sex),
    x = 'log2FoldChange',
    y = 'pvalue',
    xlim = c(-15, 15))

pdf("Sex_volcano.pdf",width=16,height=8)
sex
dev.off()
```

# Volcano and MA for genotype
```{r}
resLFC_geno <- lfcShrink(dds, coef="Genotype_BB_vs_AA", type="apeglm")
plotMA(resLFC_geno,0.05, ylim=c(-15,15))
library("EnhancedVolcano")
geno = EnhancedVolcano(resLFC_geno,
                lab= rownames(resLFC_geno),
    x = 'log2FoldChange',
    y = 'pvalue',
    xlim = c(-15, 15))

pdf("geno_volcano.pdf",width=16,height=8)
geno
dev.off()
```





#Get positions for all contigs
```{r}

library(dplyr)

gff3 = read.table("gmap.gff3", sep="\t", header=FALSE)
colnames(gff3) = c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
gff3$attributes = as.character(gff3$attributes)
gff3$contig =  sapply(strsplit(gff3$attributes,";"), `[`, 2)
gff3$contig = sapply(strsplit(gff3$contig,"="), `[`, 2)


new_ordered = gff3[with(gff3, order(seqid, source, type, start, end)),]

no_dups = new_ordered %>% distinct(seqid, source, start, end, contig, .keep_all = T)

no_dups2 = new_ordered %>% distinct(contig, .keep_all = T)


#Generate general position categories for all contigs

for (i in seq(1,dim(no_dups2)[1])) {
  
  Contig <- no_dups2[i,10]

   if (no_dups2[i,1] == "LG1"  && no_dups2[i,4] > 8300000 && no_dups2[i,5] < 33500000){
    
    
    pos = c("LG1_inversion")

} else if (no_dups2[i,1] == "LG4"  && no_dups2[i,4] > 1000000 && no_dups2[i,5] < 8000000){
    
    
    pos = c("LG4_inversion")
    
} else if (no_dups2[i,1] == "LG4"  && no_dups2[i,4] > 22000000 && no_dups2[i,5] < 32000000) {
  
   pos = c("LG4_inversion2")

} else {

  pos = no_dups2[i,1]
}
     new.data <- cbind.data.frame(Contig,pos)

  
  if ((i==1)) {
   positions <- as.data.frame(new.data)             

  } else {  

      positions<-rbind(positions, new.data)
    
  }

  
} #end the i-loop



#Get information on genome positions for significant transcripts from different analyses
adults_new$Contig = rownames(adults_new)

positions_tested = subset(positions, positions$Contig %in% adults_new$Contig)

control = table(positions_tested$pos) 
write.table(control, file="rna_overall_pos", row.names = FALSE, sep=",")


sex_pos = subset(positions_tested, positions_tested$Contig %in% rownames(resx_sex))
sex = table(sex_pos$pos)  
write.table(sex, file="summary_sexsig_pos", row.names = FALSE, sep=",")

geno_pos = subset(positions_tested, positions_tested$Contig %in% rownames(resx_geno))
geno = table(geno_pos$pos)  
write.table(geno, file="summary_genosig_pos", row.names = FALSE, sep=",")


int_pos = subset(positions_tested, positions_tested$Contig %in% rownames(resx_int))
int = table(int_pos$pos)  
write.table(int, file="summary_intsig_pos", row.names = FALSE, sep=",")




```


#Run topGO
```{r}
library(topGO)
library(splitstackshape)

#read in GO mappings for tested loci

geneID2GO = readMappings("adult_tested_go.txt")

geneNames = names(geneID2GO)

#for_sex
sex = rownames(resx_sex)

geneList = factor(as.integer(geneNames %in% sex))
names(geneList) <- geneNames



GOdata <- new("topGOdata", ontology = "BP", allGenes = geneList,
                    annot = annFUN.gene2GO, gene2GO = geneID2GO)

resultTopGO.elim <- runTest(GOdata, algorithm = "elim", statistic = "Fisher" )
allRes_sex <- GenTable(GOdata, elimKS = resultTopGO.elim,
                   orderBy = "elimKS", 
                   topNodes = length(geneList))

allRes_sex$padjust<-p.adjust(allRes_sex$elimKS,method="BH")


#for geno
geno = rownames(resx_geno)

geneList = factor(as.integer(geneNames %in% geno))
names(geneList) <- geneNames



GOdata <- new("topGOdata", ontology = "BP", allGenes = geneList,
                    annot = annFUN.gene2GO, gene2GO = geneID2GO)

resultTopGO.elim2 <- runTest(GOdata, algorithm = "elim", statistic = "fisher" )
allRes_geno2 <- GenTable(GOdata, elimKS = resultTopGO.elim2,
                   orderBy = "elimKS", 
                   topNodes = length(geneList))

allRes_geno2$padjust<-p.adjust(allRes_geno2$elimKS,method="BH")

```




#Do analysis for just males
```{r}
matrix = read.table("adult2.counts.matrix")
samples = read.table("adults2.txt", sep="\t", header=TRUE)
samples$ID = samples$good


adults_new = matrix %>% mutate(Total = dplyr::select(., P10F:P9FD) %>% rowSums(na.rm = TRUE))
rownames(adults_new) = rownames(matrix)
adults_new = subset(adults_new, adults_new[ , 18] > 10, drop=FALSE ) 
adults_new = adults_new[,1:17]


matrix = as.matrix(adults_new)
matrix2 = round(matrix)



dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Genotype)

colnames(dds)
#keep males
dds <- dds[,c(2,5,7,10,12,13,14,16)]


dds = DESeq(dds)
resultsNames(dds)
plotDispEsts(dds)

```

#separate out genotype, sex, and interaction significant DE genes alpha = 0.05 and logfold min = 2
```{r}
res_Mgeno <- results(dds, name="Genotype_BB_vs_AA", alpha=0.05)
resx_Mgeno <- res_Mgeno %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame
sig_Mgeno = resx_Mgeno[,c(2,6)]


```


#look at overall sample relationships for males using matrix and pca
```{r}
vsd <- vst(dds, blind=FALSE)

greeks=c(alpha='\u03b1', tau='\u03c4', sigma='\u03c3',
                         beta='\u03b2',
                         gamma='\u03b3')


levels(vsd$Genotype)  = c(paste0(greeks['alpha'],greeks['alpha']), paste0(greeks['beta'],greeks['beta']))



library("pheatmap")
sampleDists <- dist(t(assay(vsd)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$Sex,  vsd$Genotype, vsd$Population,  sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)


sampleDists <- dist(t(assay(vsd)), method = "manhattan")

samplesM = subset(samples, samples$Sex=="Male")

adonis2(formula= sampleDists ~ samplesM$Genotype, method = "manhattan")



plotPCA(vsd, intgroup=c("Genotype"))


cairo_pdf("pca_males.pdf",width=8,height=8)
plotPCA(vsd, intgroup=c("Genotype"))
dev.off()
```


#Volcano plot for male genotype
```{r}
resLFC_geno <- lfcShrink(dds, coef="Genotype_BB_vs_AA", type="apeglm")
plotMA(resLFC_geno,0.05, ylim=c(-15,15))
library("EnhancedVolcano")
geno = EnhancedVolcano(resLFC_geno,
                lab= rownames(resLFC_geno),
    x = 'log2FoldChange',
    y = 'pvalue',
    xlim = c(-15, 15))

pdf("geno_volcano_males.pdf",width=16,height=8)
geno
dev.off()
```


#Do just females
```{r}
matrix = read.table("adult2.counts.matrix")
samples = read.table("adults2.txt", sep="\t", header=TRUE)
samples$ID = samples$good


adults_new = matrix %>% mutate(Total = dplyr::select(., P10F:P9FD) %>% rowSums(na.rm = TRUE))
rownames(adults_new) = rownames(matrix)
adults_new = subset(adults_new, adults_new[ , 18] > 10, drop=FALSE ) 
adults_new = adults_new[,1:17]


matrix = as.matrix(adults_new)
matrix2 = round(matrix)



dds <- DESeqDataSetFromMatrix(countData = matrix2,
                              colData = samples,
                              design = ~ Genotype)

colnames(dds)
#keep females
dds <- dds[,c(1,3,4,6,8,9,11,15,17)]


dds = DESeq(dds)
resultsNames(dds)
plotDispEsts(dds)

```

#separate out genotype alpha = 0.05 and logfold min = 2
```{r}
res_Fgeno <- results(dds, name="Genotype_BB_vs_AA", alpha=0.05)
resx_Fgeno <- res_Fgeno %>% subset(padj < 0.05 & abs(log2FoldChange)>2) %>% as.data.frame

sig_Fgeno = resx_Fgeno[,c(2,6)]

```




#look at overall sample relationships for females using matrix and pca
```{r}
vsd <- vst(dds, blind=FALSE)
rld <- rlog(dds, blind=FALSE)

greeks=c(alpha='\u03b1', tau='\u03c4', sigma='\u03c3',
                         beta='\u03b2',
                         gamma='\u03b3')


levels(vsd$Genotype)  = c(paste0(greeks['alpha'],greeks['alpha']), paste0(greeks['beta'],greeks['beta']))



library("pheatmap")
sampleDists <- dist(t(assay(vsd)))

sampleDists <- dist(t(assay(vsd)), method = "manhattan")

samplesF = subset(samples, samples$Sex=="Female")

adonis2(formula= sampleDists ~ samplesF$Genotype + samplesF$Population,  method = "manhattan")



library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$Sex,  vsd$Genotype, vsd$Population,  sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)

cairo_pdf("pca_females.pdf",width=8,height=8)
plotPCA(vsd, intgroup=c("Genotype"), )
dev.off()

```
#Volcano plot for female genotype
```{r}
resLFC_geno <- lfcShrink(dds, coef="Genotype_BB_vs_AA", type="apeglm")
plotMA(resLFC_geno,0.05, ylim=c(-15,15))
library("EnhancedVolcano")
geno = EnhancedVolcano(resLFC_geno,
                lab= rownames(resLFC_geno),
    x = 'log2FoldChange',
    y = 'pvalue',
    xlim = c(-15, 15))

pdf("geno_volcano_females.pdf",width=16,height=8)
geno
dev.off()
```


#topGO for male and female analyses
```{r}
library(topGO)
library(splitstackshape)



#read in GO mappings for tested contigs

geneID2GO = readMappings("adult_tested_go.txt")

geneNames = names(geneID2GO)

#for_mGeno
Mgeno = rownames(resx_Mgeno)

geneList = factor(as.integer(geneNames %in% Mgeno))
names(geneList) <- geneNames



GOdata <- new("topGOdata", ontology = "BP", allGenes = geneList,
                    annot = annFUN.gene2GO, gene2GO = geneID2GO)

resultTopGO.elim <- runTest(GOdata, algorithm = "elim", statistic = "Fisher" )
allRes_Mgeno <- GenTable(GOdata, elimKS = resultTopGO.elim,
                   orderBy = "elimKS", 
                   topNodes = length(geneList))

allRes_Mgeno$padjust<-p.adjust(allRes_Mgeno$elimKS,method="BH")


#for Fgeno
Fgeno = rownames(resx_Fgeno)

geneList = factor(as.integer(geneNames %in% Fgeno))
names(geneList) <- geneNames



GOdata <- new("topGOdata", ontology = "BP", allGenes = geneList,
                    annot = annFUN.gene2GO, gene2GO = geneID2GO)

resultTopGO.elim <- runTest(GOdata, algorithm = "elim", statistic = "fisher" )
allRes_Fgeno <- GenTable(GOdata, elimKS = resultTopGO.elim,
                   orderBy = "elimKS", 
                   topNodes = length(geneList))

allRes_Fgeno$padjust<-p.adjust(allRes_Fgeno$elimKS,method="BH")

```

